These would need need to be imported in finalized package
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(amt)
##
## Attaching package: 'amt'
## The following object is masked from 'package:stats':
##
## filter
library(raster)
## Loading required package: sp
##
## Attaching package: 'sp'
## The following object is masked from 'package:amt':
##
## bbox
##
## Attaching package: 'raster'
## The following object is masked from 'package:amt':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(parallel)
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:raster':
##
## shift
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(progress)
library(pbapply)
library(pbmcapply)
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:raster':
##
## union
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
To side-step issues where rasters are of different resolution, we can create our own custom grid for predictions that match the most confined extents of the rasters. We also might want to specify the resolution of that underlying raster. If that raster is in utm (as defined below), we can specify x and y cell sizes
create_mock_surface <- function(raster.list, multiple.extents = F, resolution = list(x = 100, y = 100)){
# If rasters are all of the same extent, take the extent
if(multiple.extents == F){
xmin <- extent(raster.list)[1]
xmax <- extent(raster.list)[2]
ymin <- extent(raster.list)[3]
ymax <- extent(raster.list)[4]
}
# If rasters are of different extent, take the overlap extent
if(multiple.extents == T){
xmin <- max(unlist(lapply(raster.list, function(x) {extent(x)[1]})))
xmax <- max(unlist(lapply(raster.list, function(x) {extent(x)[2]})))
ymin <- max(unlist(lapply(raster.list, function(x) {extent(x)[3]})))
ymax <- max(unlist(lapply(raster.list, function(x) {extent(x)[4]})))
}
# Create new raster
mock.surface <- raster(
ncol=(xmax-xmin)/resolution$x, # raster automatically rounds, total cols
nrow=(ymax-ymin)/resolution$y, # raster automatically rounds, total rows
xmn=xmin, # min x exent
xmx=xmax, # max x exent
ymn=ymin, # min y exent
ymx=ymax, # max y exent
crs = crs(raster.list[[1]])) # take CRS from first raster, requires that rasters match in CRS
values(mock.surface) <- 1:(ncol(mock.surface)*nrow(mock.surface)) # not important, just for visualization
return(mock.surface)
}
We should check that a model is correctly specified, before throwing errors down the line.
check_ssf <- function(ssf.obj) {
inherits(ssf.obj, c("fit_clogit")) # not broad enough, basically just from smt at the moment
}
We should create a reasonable null step, and we can do so by using the estimated gamma distribution from amt. However, we could just as easily take quantiles from the distribution of step lengths we observe, instead.
step_distance <- function(ssf.obj, quantile) {
if(!check_ssf(ssf.obj)) stop("Check that SSF model is valid")
step <- qgamma(quantile, # user specified quantile
shape = ssf.obj$sl_$params$shape, # estimated from amt
scale = ssf.obj$sl_$params$scale) # estimated from amt
return(step)
}
Now that we have a valid SSF object and a prediction surface, we need to find our prediction cells of interest. Lets get our raster data.
get_cells <- function(ssf.obj, mock.surface, raster, accessory.x.preds = NULL){
if(!check_ssf(ssf.obj)) stop("Check that SSF model is valid")
pred.xy <- raster::coordinates(mock.surface) # get coordinates from grid we created
predict.data <- data.frame(cbind(pred.xy, raster::extract(raster, pred.xy, df=TRUE))) # makes raster values a data frame
predict.data$step_id_unique = ssf.obj$model$xlevels$`strata(step_id_)`[1] # fix the strata to something reasonable
if(!is.null(accessory.x.preds)) {
predict.data <- cbind(predict.data, accessory.x.preds) # this adds extraneous x values that are not in matrix
}
cells <- nrow(pred.xy) # number of cells
predict.data$cellnr <- 1:cells # assign cell numbers, redundant of ID
return(predict.data)
}
Not all combinations of parameter space are valid, and we often are missing raster data at edges
get_cell_data <- function(ssf.obj, pred.data){
if(!check_ssf(ssf.obj)) stop("Check that SSF model is valid")
cells <- nrow(pred.data) # number of cells
# relying on amt step-selection models, we can predict log-RSS
# there are better ways to predict, but this is simple
log.rss <- amt::log_rss(ssf.obj, # the model
pred.data, # the raster data (including missing values)
pred.data %>%
drop_na() %>%
sample_n(1), # a row of the raster data (excluding missing values)
ci = NA)
full.raster.data <- data.frame(pred.data, lRSS = log.rss$df$log_rss) # bind predictions to the original data
return(full.raster.data)
}
We need to find neighbors of cells in our matrix. This is easy, but could require n^3 comparisons. So many comparisons are computationally inefficient. One thing we can rely on is that all cells in a raster (or matrix) can be indexed. Additionally, distances between cells in a raster are repetitive. In general, there are only ~0.2% unique pairwise distances between cell indices. We can find these unique distances because we know the difference in index of the comparisons, the column identify of the minimum index, and the observed distance. This allows us to have a table that we can call repetitively instead of recalculating millions of distances.
neighbor_lookup <- function(mock.surface, cell.data, cell.data.list = NULL){
cols <- mock.surface@ncols # columns in prediction
rows <- mock.surface@nrows # rows in prediction
cells <- cols*rows # number of cells
index <- 1:cells # all index values in our prediction raster
# create a matrix for each column in the first row and its comparisons to distance to all other cells
if(is.null(cell.data.list)){
print("Splitting cell.data into list")
cell.data.list <- split(cell.data, cell.data$cellnr) # split the prediction data into row-wise lists to use lapply
}
if(!is.null(cell.data.list)){
print("Using inputted list of cell data")
cell.data.list <- cell.data.list # split the prediction data into row-wise lists to use lapply
}
# this is a progress bar we can use in a for loop
pb <- progress_bar$new(format = "(:spin) [:bar] :percent [Elapsed time: :elapsedfull || Estimated time remaining: :eta]", total = cols, complete = "=", incomplete = "-", current = ">", clear = FALSE, width = 100)
for(i in 1:cols) { # step through columns
dist <- pointDistance(cell.data[i,c("x","y")], # choose the first row cell by column (raster indices are row-wise)
cell.data[i:cells,c("x","y")], # choose all other cells
lonlat = F) # we are using UTM
if(i == 1) mat.dist <- matrix(dist, ncol = 1)
if(i > 1) mat.dist <- cbind(mat.dist, c(dist, rep(NA, i-1))) # for each additional column, there are i-1 comparisons that are repeated (unnecessary)
pb$tick() # for progress bar
}
return(mat.dist)
}
Now that we have our call-up table, we can use it to generate neighbors. This is arguably one of the most taxing (computationally) steps of the whole process. Luckily im a damn genius and found ways to make this even faster than the call-up table.
neighbor_finder <- function(ssf.obj = m2, cell.data, neighbors.found, quantile = 0.99, cell.data.list = NULL){
neighborhood.distance <- step_distance(ssf.obj, quantile) # take the X% step distance as your neighborhood
cols <- ncol(neighbors.found) # columns of our call-up table
differences <- nrow(neighbors.found) # number of differences in index values
print("Creating neighbor comparisons")
vector <- c(neighbors.found) # convert the neighbors to a vector we can index later, this is the purpose of all those NA's earlier based on i-1 unique distances
print("Finding valid comparisons")
valid <- vector < neighborhood.distance # T/F whether those neighborhood distances are less than our threshold
if(is.null(cell.data.list)){
print("Splitting cell.data into list")
cell.data.list <- split(cell.data, cell.data$cellnr) # split the prediction data into row-wise lists to use lapply
}
if(!is.null(cell.data.list)){
print("Using inputted list of cell data")
cell.data.list <- cell.data.list # split the prediction data into row-wise lists to use lapply
}
print("Running comparisons")
neighbor.mat <- pblapply(cell.data.list, function(x){ # step through each row (see list split above)
focal <- as.numeric(x$cellnr) # value of row cell number
delta <- abs(focal - cell.data$cellnr) # difference in row cell number vs all others
index <- ifelse(focal < cell.data$cellnr, focal, cell.data$cellnr) # report the minimum cell index (based on our call-up structure)
index.col <- index%%cols # use the remainder function to get the column number (see below, we have to force zeros to the column number because the remainder of the final column is zero)
df <- data.frame(difference = delta + 1, # we have to add one because differences of 0 are stored in row 1, differences of 1 in row 2, etc.
col = ifelse(index.col == 0, cols, index.col), # forcing remainders of zero the number of columns
cell.nr = cell.data$cellnr) # just tracking the cell number we are comparing against for use later
# filter the data frame based on whether our call-up values are less than the neighborhood
df <- df %>%
filter(valid[difference+((col-1)*differences)])
# filter the data set to unique rows and columns for call-up (to accommodate memory issues)
df.distinct <- df %>% distinct(difference, col)
# find the unique distances (trims time down)
df.distinct$distances <- vector[df.distinct$difference + ((df.distinct$col - 1)*differences)]
# throw the unique distances back to the full data set
df <- merge(df, df.distinct, by = c("difference","col"))
# package into a nice data frame for export
data.frame(row = focal, column = df$cell.nr, distance = df$distances)
})
# bind the output list
neighbors <- rbindlist(neighbor.mat)
# create a sparse matrix based on the focal id, alternate id, and distance
sparse.neighbors <- sparseMatrix(i = neighbors$row, j = neighbors$column, x = neighbors$distance)
# return both the matrix and the unbound list of neighbor cells
return(list(matrix = sparse.neighbors, by.cell = neighbor.mat))
}
Now that we have data divided so that we know the neighbors of a
focal cell, we can split the data into a format that is conducive to SSF
predictions. We split these into two data frames, .given
for the focal cell and .for for the neighboring cells.
compile_ssf_comparisons <- function(sparse.neighbors, cell.data) {
# this is why the export of the neighbors as individual lists was important
ssf.comparisons <- lapply(sparse.neighbors$by.cell, function(x){
baseline <- cell.data[x$column[which(x$distance == 0)],] # baseline will have a distance of zero (focal)
baseline$step <- 0 # create a variable "step" that records this zero distance
alternate <- cell.data[x$column,] # grab all the other cell.data for neighboring cells (including focal cell)
alternate$step <- x$distance # force distance to this new variable step
list(.given = baseline, .for = alternate) # return a list of focal and neigboring cell data
})
return(ssf.comparisons)
}
Using our fit movement model and the metadata for our prediction surface, we can estimate the relative risk of selecting the focal cell and all other cells in the neighborhood. We can show that all probabilities of “choosing” a cell in the prediction surface must sum to one, thus the probability of selecting the focal cell is the inverse of he sum of all relative probabilites. From log-RSS, we just exponentiate, and take the inverse sum. To find the probability of choosing all cells, we just multiply the probability of selecting the focal cell against all relative risks! Easy! This tells us the probability of selecting each of those cells given the comparison to the focal cell, so not the out-right probability of selection, but it gets us closer.
predict_ssf_comparisons <- function(ssf.obj = m2, ssf.comparisons) {
print("Estimating probability surface")
prediction.list <- pbmclapply(ssf.comparisons, function(x){ # step through the list of SSF objects
log.rss <- log_rss(ssf.obj, x$.for, x$.given, ci = NA) # get the log-RSS for each comparison
x$.for$Prob <- exp(log.rss$df$log_rss)*(1/sum(exp(log.rss$df$log_rss))) # exponentiate and multiply agains relative risk
x$.for # return the data frame with probabilities
})
print("Compiling probability surface")
for(i in 1:length(prediction.list)){
prediction.list[[i]]$focal.cell <- i # specify the focal cell for each comparison
}
print("Making sparse matrix for transitions")
bound <- rbindlist(prediction.list) # bind all data frames
# use indexing to make a massive sparse matrix quickly
Sparse.Matrix <- sparseMatrix(bound$focal.cell, bound$cellnr, x = bound$Prob)
# return the prediction list and sparse matrix
return(list(prob.surface = prediction.list, sparse.matrix = Sparse.Matrix))
}
This is a demonstration data set from the amt package. Single deer from Northern Europe and a binary forest layer as a covariate.
We read in data, create 15 random steps for each real step, and then store forest as a factor, turning angle as cosine(ta) and step length as log plus 1 (this saves us later when we compare to a focal cell with “0” step distance)
data("deer")
data("sh_forest")
ssf1 <- deer %>%
steps_by_burst() %>%
random_steps(n_control = 15) %>%
extract_covariates(sh_forest) %>%
mutate(sh.forest = factor(sh.forest),
cos_ta = cos(ta_),
log_sl = log(sl_+1))
par(mfrow = c(1,2))
plot(1-(sh_forest-1))
plot(ssf1)
We can fit a basic movement model; it appears that a forest:step interaction is favored.
m2 <- ssf1 %>%
filter(!is.na(cos_ta)) %>%
fit_clogit(case_ ~ sh.forest * poly(log_sl,2) + strata(step_id_), model = T)
summary(m2)
## Call:
## coxph(formula = Surv(rep(1, 12624L), case_) ~ sh.forest * poly(log_sl,
## 2) + strata(step_id_), data = data, model = ..1, method = "exact")
##
## n= 12624, number of events= 759
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sh.forest2 -4.724e-01 6.235e-01 1.101e-01 -4.289 1.79e-05
## poly(log_sl, 2)1 2.746e+01 8.438e+11 8.155e+00 3.368 0.000758
## poly(log_sl, 2)2 9.019e+00 8.260e+03 8.124e+00 1.110 0.266941
## sh.forest2:poly(log_sl, 2)1 -4.154e+01 9.136e-19 9.997e+00 -4.155 3.26e-05
## sh.forest2:poly(log_sl, 2)2 -3.479e+01 7.753e-16 1.011e+01 -3.441 0.000580
##
## sh.forest2 ***
## poly(log_sl, 2)1 ***
## poly(log_sl, 2)2
## sh.forest2:poly(log_sl, 2)1 ***
## sh.forest2:poly(log_sl, 2)2 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sh.forest2 6.235e-01 1.604e+00 5.025e-01 7.737e-01
## poly(log_sl, 2)1 8.438e+11 1.185e-12 9.661e+04 7.369e+18
## poly(log_sl, 2)2 8.260e+03 1.211e-04 1.003e-03 6.801e+10
## sh.forest2:poly(log_sl, 2)1 9.136e-19 1.095e+18 2.825e-27 2.955e-10
## sh.forest2:poly(log_sl, 2)2 7.753e-16 1.290e+15 1.914e-24 3.140e-07
##
## Concordance= 0.592 (se = 0.012 )
## Likelihood ratio test= 62.84 on 5 df, p=3e-12
## Wald test = 60.58 on 5 df, p=9e-12
## Score (logrank) test = 62.49 on 5 df, p=4e-12
We can make our surface for predictions - rememeber, we set initial values to their cell index number.
mock.surface <- create_mock_surface(sh_forest, F, list(x = 250, y = 250))
plot(mock.surface)
lines(deer)
We can get our prediction data. We can set forest to a factor again to get the model predictions to work.
pred.data <- get_cells(m2,
mock.surface,
sh_forest,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data$sh.forest <- factor(pred.data$sh.forest)
head(pred.data)
## x y ID sh.forest step_id_unique log_sl cos_ta cellnr
## 1 4304850 3455600 1 1 step_id_=1 5.413297 1 1
## 2 4305100 3455600 2 2 step_id_=1 5.413297 1 2
## 3 4305351 3455600 3 2 step_id_=1 5.413297 1 3
## 4 4305601 3455600 4 2 step_id_=1 5.413297 1 4
## 5 4305852 3455600 5 2 step_id_=1 5.413297 1 5
## 6 4306102 3455600 6 2 step_id_=1 5.413297 1 6
We can fit our original SSF sensu other publications, where a baseline is chosen and run with. It looks like a RSF, but is it?
cell.data <- get_cell_data(m2, pred.data)
plot(setValues(mock.surface, cell.data$lRSS))
points(deer, pch = ".", col = alpha("black", 0.5))
Lets make our neighbor look-up matrix. We can plot it here as a raster, but its shows the general idea.
neighbors.found <- neighbor_lookup(mock.surface, cell.data)
## [1] "Splitting cell.data into list"
## [1] "Using inputted list of cell data"
plot(raster(neighbors.found))
We can use our look-up matrix to find all of our neighbors. Here, we can see a few examples of how this works.
sparse.neighbors <- neighbor_finder(m2, cell.data, neighbors.found, quantile = 0.99)
## [1] "Creating neighbor comparisons"
## [1] "Finding valid comparisons"
## [1] "Splitting cell.data into list"
## [1] "Using inputted list of cell data"
## [1] "Running comparisons"
par(mfrow = c(2,3))
plot(setValues(mock.surface, sparse.neighbors$matrix[sample(1:nrow(sparse.neighbors$matrix), 1),]))
plot(setValues(mock.surface, sparse.neighbors$matrix[sample(1:nrow(sparse.neighbors$matrix), 1),]))
plot(setValues(mock.surface, sparse.neighbors$matrix[sample(1:nrow(sparse.neighbors$matrix), 1),]))
plot(setValues(mock.surface, sparse.neighbors$matrix[sample(1:nrow(sparse.neighbors$matrix), 1),]))
plot(setValues(mock.surface, sparse.neighbors$matrix[sample(1:nrow(sparse.neighbors$matrix), 1),]))
plot(setValues(mock.surface, sparse.neighbors$matrix[sample(1:nrow(sparse.neighbors$matrix), 1),]))
We can create all the comparisons between focal and non-focal cells.
We need to add some extraneous things like creating our
log_sl and cos_ta values. Again, log plus one
for all the step lengths to accommodate zeros. Functionally, our
specification of turning angle is BS, but it shouldn’t affect
inferences. In the future, turning angle could be estimated based on the
turning angle (if we wanted to get fancy).
ssf.comparisons <- compile_ssf_comparisons(sparse.neighbors, cell.data)
ssf.comparisons <- lapply(ssf.comparisons, function(x) {
x$.for$log_sl <- log(x$.for$step+1)
x$.given$log_sl <- log(x$.given$step+1)
x$.for$cos_ta <- 0
x$.given$cos_ta <- 0
list(.for = x$.for, .given = x$.given)
})
head(ssf.comparisons$`1`$.for)
## x y ID sh.forest step_id_unique log_sl cos_ta cellnr
## 1 4304850 3455600 1 1 step_id_=1 0.000000 0 1
## 151 4304850 3455100 151 2 step_id_=1 6.216606 0 151
## 152 4305100 3455100 152 2 step_id_=1 6.328233 0 152
## 153 4305351 3455100 153 2 step_id_=1 6.563261 0 153
## 154 4305601 3455100 154 2 step_id_=1 6.805966 0 154
## 155 4305852 3455100 155 2 step_id_=1 7.021286 0 155
## lRSS step
## 1 0.3488135 0.0000
## 151 0.0000000 500.0000
## 152 0.0000000 559.1661
## 153 0.0000000 707.5783
## 154 0.0000000 902.2200
## 155 0.0000000 1119.2267
We can now predict our whole surface based on the transition probabilities between each focal and each neighbor cell. We can visualize some below.
surface <- predict_ssf_comparisons(m2, ssf.comparisons)
## [1] "Estimating probability surface"
## [1] "Compiling probability surface"
## [1] "Making sparse matrix for transitions"
par(mfrow = c(3,4), mai = c(0, 0, 0.1, 0))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
plot(setValues(mock.surface, surface$sparse.matrix[sample(1:nrow(surface$sparse.matrix), 1),]))
We can use these relationships to make a graph. We will trim any NA nodes and edges, but we should record their identity for mapping back to the prediction surface. This graph is absolutely huge, so it not really legible and is not plotted here.
g <- graph_from_adjacency_matrix(surface$sparse.matrix, mode = "directed", weighted = T, diag = T)
V(g)$name <- V(g)
Isolated <- which(degree(g)==0)
Connected <- which(degree(g)>0)
g <- delete.vertices(g, Isolated)
g <- delete.edges(g, E(g)[is.na(weight)])
# plot(g)
We can calculate PageRank centrality, which is arguably the closest thing to an RSF-like definition. As you can see, its different than the above RSS surface.
pg <- page_rank(g)
page.rank.raster <- setValues(mock.surface, pg$vect)
plot(page.rank.raster)
points(deer, pch = ".", col = alpha("black", 0.25))
We can also calculate other things, like the sum of all probabilities favoring a cell across all pertinent neighbors. Or the diagonal of our transition matrix, which is the probability of selecting a focal cell given the neighborhood.
diag <- diag(surface$sparse.matrix)
sum.prob <- colSums(surface$sparse.matrix)
sum.m.diag <- sum.prob - diag
neighbors <- unlist(lapply(ssf.comparisons, function(x) nrow(x$.for)))
par(mfrow = c(2,2), mai = c(0,0,0.2,0))
plot(setValues(mock.surface, diag), main = "Diagonal")
plot(setValues(mock.surface, sum.prob), main = "Sum")
plot(setValues(mock.surface, sum.m.diag), main = "Sum - Diagonal")
plot(setValues(mock.surface, neighbors), main = "Neighbors")
Here, we can run with summed probabilities as a first proxy. We can see there are real, tangible differences from PageRank.
sum.prob.raster <- setValues(mock.surface, sum.prob)
plot(spatialEco::rasterCorrelation(page.rank.raster, sum.prob.raster))
points(deer, pch = ".", col = alpha("black", 0.25))
We can make a mock RSF, using our deer data set in a super flawed, overfit way. Interestingly, this RSF is PERFECTLY correlated with our prior Log-RSS surface - so pretty cool.
rsf <- deer %>%
random_points() %>%
extract_covariates(sh_forest) %>%
mutate(sh.forest = factor(sh.forest))
rsf <- rsf %>%
fit_rsf(case_ ~ sh.forest, model = T)
probabilities <- exp(predict(rsf$model, newdata = pred.data))/(1+exp(predict(rsf$model, newdata = pred.data)))
rsf.prob.raster <- setValues(mock.surface, probabilities)
plot(rsf.prob.raster)
There are, however, deviations from both the base probabilities and the
PageRank
par(mfrow = c(1,2))
plot(spatialEco::rasterCorrelation(page.rank.raster, rsf.prob.raster))
points(deer, pch = ".", col = alpha("black", 0.25), main = "RSF vs PageRank")
plot(spatialEco::rasterCorrelation(sum.prob.raster, rsf.prob.raster))
points(deer, pch = ".", col = alpha("black", 0.25), main = "RSF vs Probability")
We can also compare the data they contain, and what predictive power they have for the deer locations we observed.
ssf1 <- deer %>%
extract_covariates(stack(rsf.prob.raster, sum.prob.raster, page.rank.raster))
quantiles.pred <- quantile(values(rsf.prob.raster), seq(0.01, 1, by = 0.01), na.rm = T)
percent.in.a <- rep(0,100)
for (i in 1:(length(quantiles.pred))){
yup <- ssf1$layer.1 <= quantiles.pred[i]
percent.in.a[i] <- sum(yup, na.rm = T)
}
percent.in.a <- percent.in.a/table(is.na(ssf1$layer.1))[1]
quantiles.pred <- quantile(values(sum.prob.raster), seq(0.01, 1, by = 0.01), na.rm = T)
percent.in.b <- rep(0,100)
for (i in 1:(length(quantiles.pred))){
yup <- ssf1$layer.2 <= quantiles.pred[i]
percent.in.b[i] <- sum(yup, na.rm = T)
}
percent.in.b <- percent.in.b/table(is.na(ssf1$layer.2))[1]
quantiles.pred <- quantile(values(page.rank.raster), seq(0.01, 1, by = 0.01), na.rm = T)
percent.in.c <- rep(0,100)
for (i in 1:(length(quantiles.pred))){
yup <- ssf1$layer.3 <= quantiles.pred[i]
percent.in.c[i] <- sum(yup, na.rm = T)
}
percent.in.c <- percent.in.c/table(is.na(ssf1$layer.3))[1]
tibble(percent_pred = rep(seq(0, 1, by = 0.01)[-1],3),
precent_obs = c(1-percent.in.a,
1-percent.in.b,
1-percent.in.c),
pred_type = rep(c("RSF", "Summed Probability iSFF", "PageRank iSFF"),
each = 100)) %>%
group_by(pred_type) %>%
# mutate(cum_percent = cumsum(precent_obs)) %>%
ggplot(aes(x = percent_pred, y = precent_obs, color = pred_type)) +
geom_point() +
geom_line() +
theme_minimal() +
labs(x = "Percentile of Predicted Surface",
y = "Cumulative Percent of Observed Data by Percentile")
plot(page.rank.raster>quantiles.pred[min(which((1-percent.in.c) < 0.9))])
points(amt_fisher, pch = ".")
This is data for a few fishers with elevation, land cover, and population density. I added aspect, TRI, and slope for fun. All the subheadings are largely the same, so I won’t go into too much detail.
I created some new variables, and because extents are all different, they have to be called separately for pulling data. This is relevant later.
data("amt_fisher")
data("amt_fisher_covar")
amt_fisher_covar$slope <- terrain(amt_fisher_covar$elevation, opt="slope", neighbors=8, unit="degrees")
amt_fisher_covar$aspect <- terrain(amt_fisher_covar$elevation, opt="aspect", neighbors=8, unit="degrees")
amt_fisher_covar$TRI <- terrain(amt_fisher_covar$elevation, opt="TRI", neighbors=8, unit="degrees")
ssf1 <- amt_fisher %>%
filter(name == "Leroy") %>%
track_resample(rate = minutes(15), tolerance = minutes(3)) %>%
filter_min_n_burst(min_n = 3) %>%
steps_by_burst() %>%
random_steps() %>%
extract_covariates(amt_fisher_covar$landuse) %>%
extract_covariates(amt_fisher_covar$elevation) %>%
extract_covariates(amt_fisher_covar$slope) %>%
extract_covariates(amt_fisher_covar$aspect) %>%
extract_covariates(amt_fisher_covar$TRI) %>%
extract_covariates(amt_fisher_covar$popden) %>%
mutate(landC = factor(landuse),
# case_when(
# landuse %in% c(81, 82) ~ "agri",
# landuse %in% c(41, 42, 43) ~ "forest",
# landuse %in% c(52) ~ "shrub",
# landuse %in% c(31, 71) ~ "grass",
# landuse %in% c(90,95) ~ "wet",
# landuse %in% c(11, 21, 22, 23, 24) ~ "other"),
cos_ta = cos(ta_),
log_sl = log(sl_+1),
forest = factor(landC == 50))
m2 <- ssf1 %>%
fit_clogit(case_ ~ (elevation + tri + popden + slope + aspect + cos_ta + log_sl)^2 + strata(step_id_), model = T)
summary(m2)
## Call:
## coxph(formula = Surv(rep(1, 8976L), case_) ~ (elevation + tri +
## popden + slope + aspect + cos_ta + log_sl)^2 + strata(step_id_),
## data = data, model = ..1, method = "exact")
##
## n= 8908, number of events= 748
## (68 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## elevation 1.860e-01 1.204e+00 3.533e-02 5.263 1.42e-07 ***
## tri 1.283e+00 3.608e+00 5.751e-01 2.231 0.025650 *
## popden 1.355e-02 1.014e+00 3.963e-03 3.419 0.000629 ***
## slope 1.427e-02 1.014e+00 5.053e-01 0.028 0.977478
## aspect 1.573e-02 1.016e+00 8.011e-03 1.963 0.049623 *
## cos_ta 4.869e-01 1.627e+00 1.056e+00 0.461 0.644839
## log_sl 1.167e+00 3.211e+00 4.353e-01 2.680 0.007352 **
## elevation:tri -1.121e-02 9.889e-01 4.966e-03 -2.256 0.024046 *
## elevation:popden -1.206e-04 9.999e-01 4.011e-05 -3.008 0.002632 **
## elevation:slope -1.367e-03 9.986e-01 4.692e-03 -0.291 0.770708
## elevation:aspect -1.251e-04 9.999e-01 8.058e-05 -1.553 0.120482
## elevation:cos_ta -1.492e-02 9.852e-01 1.041e-02 -1.433 0.151737
## elevation:log_sl -1.111e-02 9.890e-01 4.386e-03 -2.532 0.011345 *
## tri:popden -2.507e-04 9.997e-01 2.227e-04 -1.126 0.260355
## tri:slope 2.659e-02 1.027e+00 1.494e-02 1.779 0.075215 .
## tri:aspect -1.854e-05 1.000e+00 5.315e-04 -0.035 0.972171
## tri:cos_ta -9.505e-02 9.093e-01 6.214e-02 -1.529 0.126154
## tri:log_sl -7.866e-03 9.922e-01 2.781e-02 -0.283 0.777276
## popden:slope -2.221e-04 9.998e-01 1.746e-04 -1.272 0.203289
## popden:aspect -1.269e-06 1.000e+00 1.537e-06 -0.826 0.409032
## popden:cos_ta -2.104e-04 9.998e-01 2.099e-04 -1.003 0.316067
## popden:log_sl -7.760e-05 9.999e-01 9.221e-05 -0.842 0.400026
## slope:aspect -5.399e-04 9.995e-01 3.618e-04 -1.492 0.135670
## slope:cos_ta 4.176e-02 1.043e+00 4.282e-02 0.975 0.329428
## slope:log_sl 2.024e-02 1.020e+00 1.990e-02 1.017 0.309076
## aspect:cos_ta -2.209e-04 9.998e-01 5.864e-04 -0.377 0.706361
## aspect:log_sl -3.303e-04 9.997e-01 2.660e-04 -1.242 0.214335
## cos_ta:log_sl 2.770e-01 1.319e+00 3.272e-02 8.465 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## elevation 1.2044 0.8303 1.1238 1.2907
## tri 3.6084 0.2771 1.1690 11.1382
## popden 1.0136 0.9865 1.0058 1.0215
## slope 1.0144 0.9858 0.3768 2.7310
## aspect 1.0159 0.9844 1.0000 1.0319
## cos_ta 1.6272 0.6146 0.2053 12.8969
## log_sl 3.2114 0.3114 1.3684 7.5369
## elevation:tri 0.9889 1.0113 0.9793 0.9985
## elevation:popden 0.9999 1.0001 0.9998 1.0000
## elevation:slope 0.9986 1.0014 0.9895 1.0079
## elevation:aspect 0.9999 1.0001 0.9997 1.0000
## elevation:cos_ta 0.9852 1.0150 0.9653 1.0055
## elevation:log_sl 0.9890 1.0112 0.9805 0.9975
## tri:popden 0.9997 1.0003 0.9993 1.0002
## tri:slope 1.0269 0.9738 0.9973 1.0575
## tri:aspect 1.0000 1.0000 0.9989 1.0010
## tri:cos_ta 0.9093 1.0997 0.8051 1.0271
## tri:log_sl 0.9922 1.0079 0.9395 1.0477
## popden:slope 0.9998 1.0002 0.9994 1.0001
## popden:aspect 1.0000 1.0000 1.0000 1.0000
## popden:cos_ta 0.9998 1.0002 0.9994 1.0002
## popden:log_sl 0.9999 1.0001 0.9997 1.0001
## slope:aspect 0.9995 1.0005 0.9988 1.0002
## slope:cos_ta 1.0426 0.9591 0.9587 1.1339
## slope:log_sl 1.0204 0.9800 0.9814 1.0610
## aspect:cos_ta 0.9998 1.0002 0.9986 1.0009
## aspect:log_sl 0.9997 1.0003 0.9991 1.0002
## cos_ta:log_sl 1.3191 0.7581 1.2372 1.4065
##
## Concordance= 0.668 (se = 0.013 )
## Likelihood ratio test= 221 on 28 df, p=<2e-16
## Wald test = 209.2 on 28 df, p=<2e-16
## Score (logrank) test = 226.2 on 28 df, p=<2e-16
mock.surface <- create_mock_surface(amt_fisher_covar, T, list(x = 200, y = 200))
Because we cant pull all underlying raster data in one pull, we have to do it multiple times for each variable and then merge. Then we have to sort by cell number to get things square.
pred.data.l <- get_cells(m2,
mock.surface,
amt_fisher_covar$landuse,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data.e <- get_cells(m2,
mock.surface,
amt_fisher_covar$elevation,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data.p <- get_cells(m2,
mock.surface,
amt_fisher_covar$popden,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data.s <- get_cells(m2,
mock.surface,
amt_fisher_covar$slope,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data.a <- get_cells(m2,
mock.surface,
amt_fisher_covar$aspect,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data.t <- get_cells(m2,
mock.surface,
amt_fisher_covar$TRI,
accessory.x.preds = list(log_sl = log(step_distance(m2, 0.5)),
cos_ta = 1))
pred.data <- merge(
merge(
merge(
merge(
merge(pred.data.l,pred.data.e),
pred.data.p),
pred.data.s),
pred.data.a),
pred.data.t)
pred.data <- pred.data %>%
arrange(cellnr)
pred.data$forest <- factor(pred.data$landuse == 50)
cell.data <- get_cell_data(m2, pred.data)
neighbors.found <- neighbor_lookup(mock.surface, cell.data)
## [1] "Splitting cell.data into list"
## [1] "Using inputted list of cell data"
sparse.neighbors <- neighbor_finder(m2, cell.data, neighbors.found, quantile = 0.99)
## [1] "Creating neighbor comparisons"
## [1] "Finding valid comparisons"
## [1] "Splitting cell.data into list"
## [1] "Using inputted list of cell data"
## [1] "Running comparisons"
ssf.comparisons <- compile_ssf_comparisons(sparse.neighbors, cell.data)
ssf.comparisons <- lapply(ssf.comparisons, function(x) {
x$.for$log_sl <- log(x$.for$step+1)
x$.given$log_sl <- log(x$.given$step+1)
x$.for$cos_ta <- 0
x$.given$cos_ta <- 0
list(.for = x$.for, .given = x$.given)
})
surface <- predict_ssf_comparisons(m2, ssf.comparisons)
## [1] "Estimating probability surface"
## [1] "Compiling probability surface"
## [1] "Making sparse matrix for transitions"
g <- graph_from_adjacency_matrix(surface$sparse.matrix, mode = "directed", weighted = T, diag = T)
V(g)$name <- V(g)
Isolated <- which(degree(g)==0)
Connected <- which(degree(g)>0)
g <- delete.vertices(g, Isolated)
g <- delete.edges(g, E(g)[is.na(weight)])
pg <- page_rank(g)
values <- rep(NA, length(mock.surface))
values[V(g)$name] <- pg$vector
page.rank.raster <- setValues(mock.surface, values)
plot(page.rank.raster, interpolate = F)
points(amt_fisher %>% filter(name == "Leroy"), pch = ".", col = alpha("black", 0.1))
rsf.data <- amt_fisher %>%
filter(name == "Leroy") %>%
random_points() %>%
extract_covariates(amt_fisher_covar$landuse) %>%
extract_covariates(amt_fisher_covar$elevation) %>%
extract_covariates(amt_fisher_covar$slope) %>%
extract_covariates(amt_fisher_covar$aspect) %>%
extract_covariates(amt_fisher_covar$TRI) %>%
extract_covariates(amt_fisher_covar$popden) %>%
mutate(landC = factor(landuse),
forest = factor(landC == 50))
rsf <- rsf.data %>%
fit_rsf(case_ ~ (elevation + tri + popden + slope + aspect)^2, model = T)
summary(rsf)
##
## Call:
## stats::glm(formula = formula, family = stats::binomial(link = "logit"),
## data = data, model = ..1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7905 -0.4455 -0.3744 -0.3139 2.7347
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.146e+01 2.205e+00 -5.197 2.02e-07 ***
## elevation 7.229e-02 2.228e-02 3.244 0.001179 **
## tri 7.913e-01 4.890e-01 1.618 0.105611
## popden 1.488e-02 2.443e-03 6.093 1.11e-09 ***
## slope 2.514e-01 3.897e-01 0.645 0.518909
## aspect -6.944e-04 6.243e-03 -0.111 0.911437
## elevation:tri -1.938e-03 4.546e-03 -0.426 0.669894
## elevation:popden -1.225e-04 2.518e-05 -4.865 1.14e-06 ***
## elevation:slope -2.318e-03 3.700e-03 -0.626 0.531025
## elevation:aspect 4.272e-05 6.387e-05 0.669 0.503540
## tri:popden -7.047e-04 1.932e-04 -3.648 0.000264 ***
## tri:slope 2.065e-03 1.126e-02 0.183 0.854451
## tri:aspect -5.436e-04 3.895e-04 -1.396 0.162864
## popden:slope -1.270e-04 1.487e-04 -0.854 0.392860
## popden:aspect 1.085e-06 1.331e-06 0.815 0.415164
## slope:aspect -8.562e-04 2.830e-04 -3.025 0.002483 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6159.1 on 10108 degrees of freedom
## Residual deviance: 5755.8 on 10093 degrees of freedom
## AIC: 5787.8
##
## Number of Fisher Scoring iterations: 5
probabilities <- exp(predict(rsf$model, newdata = pred.data))/(1+exp(predict(rsf$model, newdata = pred.data)))
rsf.prob.raster <- setValues(mock.surface, probabilities)
plot(rsf.prob.raster)
points(amt_fisher %>% filter(name == "Leroy"), pch = ".", col = alpha("black", 0.25))
Lots of interesting differences from the RSF.
plot(spatialEco::rasterCorrelation(page.rank.raster, rsf.prob.raster))
points(deer, pch = ".", col = alpha("black", 0.25))
Here, we are leveraging the fact that there are other fisher that we never modeled to test our inferences.
ssf1 <- amt_fisher %>%
filter(name == "Leroy") %>%
extract_covariates(stack(rsf.prob.raster, page.rank.raster))
quantiles.pred <- quantile(values(rsf.prob.raster), seq(0.01, 1, by = 0.01), na.rm = T)
percent.in.a <- rep(0,100)
for (i in 1:(length(quantiles.pred))){
yup <- ssf1$layer.1 <= quantiles.pred[i]
percent.in.a[i] <- sum(yup, na.rm = T)
}
percent.in.a <- percent.in.a/table(is.na(ssf1$layer.1))[1]
quantiles.pred <- quantile(values(page.rank.raster), seq(0.01, 1, by = 0.01), na.rm = T)
percent.in.b <- rep(0,100)
for (i in 1:(length(quantiles.pred))){
yup <- ssf1$layer.2 <= quantiles.pred[i]
percent.in.b[i] <- sum(yup, na.rm = T)
}
percent.in.b <- percent.in.b/table(is.na(ssf1$layer.2))[1]
tibble(percent_pred = rep(seq(0, 1, by = 0.01)[-1],2),
precent_obs = c(1-percent.in.a,
1-percent.in.b),
pred_type = rep(c("RSF", "PageRank iSFF"),
each = 100)) %>%
group_by(pred_type) %>%
# mutate(cum_percent = cumsum(precent_obs)) %>%
ggplot(aes(x = percent_pred, y = precent_obs, color = pred_type)) +
geom_point() +
geom_line() +
theme_minimal() +
labs(x = "Percentile of Predicted Surface",
y = "Cumulative Percent of Observed Data by Percentile")
plot(page.rank.raster>quantiles.pred[min(which((1-percent.in.b) < 0.9))])
points(amt_fisher, pch = ".", col = alpha("black", 0.1))
This is data for a 59 sheep from SW Alberta, with elevation, forest canopy, distance to escape terrain, land cover, NDVI amplitude and maximum, slope, and vector ruggedness. I trimmed the data set for the movement models down to 4 individuals.
files <- c("aspect.tif","canopy.tif","descp37.tif","elev.tif","escp37.tif","lc_for.tif","lc_grass.tif","lc_other.tif","lc_rock.tif","lc_shrub.tif","ndvi_amp.tif","ndvi_max.tif","slope.tif","vrm3.tif")
raster_list <- c()
for (i in 1:length(files)) {
tmp <- str_remove(files[i], ".tif")
name <- paste0("~/Downloads/Research/BHS_Mvmt/Data/RSF-2/GIS/writtenrasters/",
files[i])
raster_list <- append(raster_list, assign(tmp, raster(name)))
}
rstack <- stack(raster_list)
rstack. <- aggregate(rstack, 50)
# plot(rstack.)
length(rstack.)
## [1] 58548
plot(sin(pi*rstack$aspect))
The movement tracks are already in individual step formats. I also appended some info from a HMM segmentation.
processed <- readRDS("~/Downloads/Research/BHS_Mvmt/Sheep_Mvmt/Manuscript Code/Processed.rds")
four.ind <- processed %>%
filter(ID %in% c("282_2008", "593_2008", "600_2007", "677_2005")) %>%
group_by(ID) %>%
distinct(dt, .keep_all = T) %>%
ungroup() %>%
mutate(date_time = as.POSIXct(date_time)) %>%
dplyr::select(x = x.1, y = y.1,
t = date_time, id = ID, age = age, state = states) %>%
arrange_at(c("id", "t")) %>%
nest(data = -c("id")) %>%
mutate(trk = lapply(data,
function(d) {
make_track(d, x, y, t,
age = age,
state = state, # this is from a HMM segmentation in three de novo states
crs = sp::CRS("+proj=utm +zone=11"))
})) %>%
mutate(steps = map(trk, function(x) {
x %>%
track_resample(rate = minutes(30), tolerance = minutes(10)) %>%
steps_by_burst(keep_cols = "start") %>%
random_steps(15) %>%
extract_covariates(rstack.) %>%
mutate(
# lc_shrub = factor(lc_shrub),
# lc_for = factor(lc_for),
# lc_grass = factor(lc_grass),
# lc_other = factor(lc_other),
# lc_rock = factor(lc_rock),
# escp37 = factor(escp37),
cos_ta = cos(ta_),
log_sl = log(sl_+1))
}))
## Warning: 'make_track' is deprecated.
## Use 'It looks like you used `CRS()` to create the crs,
## please use the ESPG directly.' instead.
## See help("Deprecated")
## Warning: 'make_track' is deprecated.
## Use 'It looks like you used `CRS()` to create the crs,
## please use the ESPG directly.' instead.
## See help("Deprecated")
## Warning: 'make_track' is deprecated.
## Use 'It looks like you used `CRS()` to create the crs,
## please use the ESPG directly.' instead.
## See help("Deprecated")
## Warning: 'make_track' is deprecated.
## Use 'It looks like you used `CRS()` to create the crs,
## please use the ESPG directly.' instead.
## See help("Deprecated")
m2 <- four.ind %>%
mutate(model = map(steps, function(x) {
x %>%
fit_clogit(case_ ~ log_sl + poly(canopy,2) + poly(vrm3,2) + poly(lc_grass,1) + poly(lc_shrub,2) + poly(lc_rock,2) + poly(ndvi_amp,1) + cos_ta + strata(step_id_), model = T)
}))
mock.surface <- create_mock_surface(rstack., F, list(x = 1250, y = 1250))
Since we have multiple models fit, we can just use the first model to get everything into shape.
pred.data <- get_cells(m2$model[[1]],
mock.surface,
rstack.,
accessory.x.preds = list(log_sl = log(step_distance(m2$model[[1]], 0.5)),
cos_ta = 1))
pred.data <- pred.data %>%
arrange(cellnr)
# pred.data$lc_for <- factor(pred.data$lc_for)
# pred.data$lc_grass <- factor(pred.data$lc_grass)
# pred.data$lc_other <- factor(pred.data$lc_other)
# pred.data$lc_shrub <- factor(pred.data$lc_shrub)
# pred.data$lc_rock <- factor(pred.data$lc_rock)
# pred.data$escp37 <- factor(pred.data$escp37)
cell.data <- get_cell_data(m2$model[[1]], pred.data)
plot(setValues(mock.surface, cell.data$lRSS), useRaster = T, interpolate = F)
lines(m2$trk[[1]])
cell.data.list <- pbmclapply(as.list(1:dim(cell.data)[1]), function(x) cell.data[x[1],])
neighbors.found <- neighbor_lookup(mock.surface, cell.data, cell.data.list)
## [1] "Using inputted list of cell data"
sparse.neighbors <- neighbor_finder(m2$model[[3]], cell.data, neighbors.found, quantile = 0.999999999999999, cell.data.list)
## [1] "Creating neighbor comparisons"
## [1] "Finding valid comparisons"
## [1] "Using inputted list of cell data"
## [1] "Running comparisons"
ssf.comparisons <- compile_ssf_comparisons(sparse.neighbors, cell.data)
ssf.comparisons <- lapply(ssf.comparisons, function(x) {
x$.for$log_sl <- log(x$.for$step+1)
x$.given$log_sl <- log(x$.given$step+1)
x$.for$cos_ta <- 0
x$.given$cos_ta <- 0
list(.for = x$.for, .given = x$.given)
})
TIDYVERSE BABYYYYYY
predicted.surfaces <- m2 %>%
mutate(surfaces = map(model, function(x) {
predict_ssf_comparisons(x, ssf.comparisons)
}))
## [1] "Estimating probability surface"
## [1] "Compiling probability surface"
## [1] "Making sparse matrix for transitions"
## [1] "Estimating probability surface"
## [1] "Compiling probability surface"
## [1] "Making sparse matrix for transitions"
## [1] "Estimating probability surface"
## [1] "Compiling probability surface"
## [1] "Making sparse matrix for transitions"
## [1] "Estimating probability surface"
## [1] "Compiling probability surface"
## [1] "Making sparse matrix for transitions"
TIDYVERSE AGAIN BABYYYYYY #roUND 2
graphs <- predicted.surfaces %>%
mutate(graph = map(surfaces, function(x) {
g <- graph_from_adjacency_matrix(x$sparse.matrix, mode = "directed", weighted = T, diag = T)
V(g)$name <- V(g)
Isolated <- which(degree(g)==0)
Connected <- which(degree(g)>0)
g <- delete.vertices(g, Isolated)
g <- delete.edges(g, E(g)[is.na(weight)])
g
}))
centrality.surface <- graphs %>%
mutate(centrality = map(graph, function(x) {
pg <- page_rank(x)
values <- rep(NA, length(mock.surface))
values[V(x)$name] <- pg$vector
page.rank.raster <- setValues(mock.surface, values)
page.rank.raster
}))
par(mfrow = c(2,2))
plot(centrality.surface$centrality[[1]]^(1/2), interpolate = F)
points(centrality.surface$data[[1]], pch = ".", col = alpha("black", 0.1))
plot(centrality.surface$centrality[[2]], interpolate = F)
points(centrality.surface$data[[2]], pch = ".", col = alpha("black", 0.1))
plot(centrality.surface$centrality[[3]], interpolate = F)
points(centrality.surface$data[[3]], pch = ".", col = alpha("black", 0.1))
plot(centrality.surface$centrality[[4]], interpolate = F)
points(centrality.surface$data[[4]], pch = ".", col = alpha("black", 0.1))
cor(cbind(values(centrality.surface$centrality[[1]]),
values(centrality.surface$centrality[[2]]),
values(centrality.surface$centrality[[3]]),
values(centrality.surface$centrality[[4]])),
use = "pairwise.complete",
method = "spearman")
## [,1] [,2] [,3] [,4]
## [1,] 1.0000000 0.3814032 0.4874355 0.6800330
## [2,] 0.3814032 1.0000000 0.8317294 0.3456686
## [3,] 0.4874355 0.8317294 1.0000000 0.4598812
## [4,] 0.6800330 0.3456686 0.4598812 1.0000000
Here, we are leveraging the fact that there are other fisher that we never modeled to test our inferences.
comparison <- processed
extracted <- raster::extract(stack(centrality.surface$centrality), as.matrix(comparison[,c("x.1", "y.1")]), df=TRUE)
quantiles <- lapply(centrality.surface$centrality, function(x){
quantile(values(x), seq(0.01, 1, by = 0.01), na.rm = T)
})
percent.in.1 <- rep(0,100)
for (i in 1:(length(percent.in.1))){
yup <- extracted$layer.1 <= quantiles[[1]][i]
percent.in.1[i] <- sum(yup, na.rm = T)
}
percent.in.1 <- percent.in.1/table(is.na(extracted$layer.1))[1]
percent.in.2 <- rep(0,100)
for (i in 1:(length(percent.in.2))){
yup <- extracted$layer.2 <= quantiles[[1]][i]
percent.in.2[i] <- sum(yup, na.rm = T)
}
percent.in.2 <- percent.in.2/table(is.na(extracted$layer.2))[1]
percent.in.3 <- rep(0,100)
for (i in 1:(length(percent.in.3))){
yup <- extracted$layer.3 <= quantiles[[1]][i]
percent.in.3[i] <- sum(yup, na.rm = T)
}
percent.in.3 <- percent.in.3/table(is.na(extracted$layer.3))[1]
percent.in.4 <- rep(0,100)
for (i in 1:(length(percent.in.4))){
yup <- extracted$layer.4 <= quantiles[[1]][i]
percent.in.4[i] <- sum(yup, na.rm = T)
}
percent.in.4 <- percent.in.4/table(is.na(extracted$layer.4))[1]
tibble(percent_pred = rep(seq(0, 1, by = 0.01)[-1],4),
precent_obs =
# c(NA,diff(percent.in.1),
# NA,diff(percent.in.2),
# NA,diff(percent.in.3),
# NA,diff(percent.in.4)),
c(1-percent.in.1,
1-percent.in.2,
1-percent.in.3,
1-percent.in.4),
pred_type = rep(paste("Animal", 1:4),
each = 100)) %>%
group_by(pred_type) %>%
# mutate(cum_percent = cumsum(precent_obs)) %>%
ggplot(aes(x = percent_pred, y = precent_obs, color = pred_type)) +
geom_point() +
geom_line() +
theme_minimal() +
labs(x = "Percentile of Predicted Surface",
y = "Cumulative Percent of Observed Data by Percentile")
Variation in animal predictions!!! Super cooooooool
pheno <- processed[processed$ID %in% four.ind$id,]
as.data.frame(pheno) %>%
distinct(ID,sex, age)
## ID sex age
## 1 282_2008 F 11
## 2 593_2008 F Adult
## 3 600_2007 F 2
## 4 677_2005 M 5
par(mfrow = c(2,2))
plot(centrality.surface$centrality[[1]]>quantiles[[1]][min(which((1-percent.in.1) < 0.5))])
points(comparison$x_, comparison$y_, pch = ".", col = alpha("black", alpha = 1))
plot(centrality.surface$centrality[[2]]>quantiles[[2]][min(which((1-percent.in.2) < 0.5))])
points(comparison$x_, comparison$y_, pch = ".", col = alpha("black", alpha = 1))
plot(centrality.surface$centrality[[3]]>quantiles[[3]][min(which((1-percent.in.3) < 0.5))])
points(comparison$x_, comparison$y_, pch = ".", col = alpha("black", alpha = 1))
plot(centrality.surface$centrality[[4]]>quantiles[[4]][min(which((1-percent.in.4) < 0.5))])
points(comparison$x_, comparison$y_, pch = ".", col = alpha("black", alpha = 1))